package com.skz.job.spider;

import lombok.Data;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.ArrayList;
import java.util.List;

/**
 * @author: 宋开宗
 * @create: 2019-02-19 12:07
 **/
@Data
public class ProxyIpSpider implements PageProcessor {
    private List<String> ips = new ArrayList<>(10);
    private List<String> ports = new ArrayList<>(10);

    @Override
    public void process(Page page) {
        ips = page.getHtml().xpath("//*[@id=\"freelist\"]/table/tbody/tr/td[1]/text()").all();
        ports = page.getHtml().xpath("//*[@id=\"freelist\"]/table/tbody/tr/td[2]/text()").all();
    }

    @Override
    public Site getSite() {
        return Site.me().setRetryTimes(3);
    }

}
